#include "config.h"
#include "iser.h"
#include "dbg.h"
#include "core.h"

extern int membuf_num;
extern size_t membuf_size;

static inline int cmdsn_cmp(uint32_t sn1, uint32_t sn2)
{
        if (sn1 == sn2)
                return 0;

        return ((int32_t)sn1 - (int32_t)sn2) > 0 ? 1 : -1;
}

static void iser_pdu_init(struct iser_pdu *pdu, void *buf)
{
        pdu->iser_hdr = (struct iser_hdr *) buf;
        buf += sizeof(struct iser_hdr);

        pdu->bhs = (struct iscsi_hdr *) buf;
        buf += sizeof(struct iscsi_hdr);

        pdu->ahs = buf;
        pdu->ahssize = 0;

        YASSERT(buf);
        pdu->membuf.addr = buf;
        pdu->membuf.size = 0;
}

static void iser_rxd_init(struct iser_work_req *rxd,
                          struct iser_task *task,
                          void *buf, unsigned size,
                          struct ibv_mr *srmr)
{
        rxd->task = task;
        rxd->iser_ib_op = ISER_IB_RECV;

        rxd->sge[0].addr = uint64_from_ptr(buf);
        rxd->sge[0].length = size;
        rxd->sge[0].lkey = srmr->lkey;

        rxd->recv_wr.wr_id = uint64_from_ptr(rxd);
        rxd->recv_wr.sg_list = rxd->sge;
        rxd->recv_wr.num_sge = 1;
        rxd->recv_wr.next = NULL;
}

static void iser_txd_init(struct iser_work_req *txd,
                          struct iser_task *task,
                          void *buf, unsigned size,
                          struct ibv_mr *srmr)
{
        txd->task = task;
        txd->iser_ib_op = ISER_IB_SEND;

        txd->sge[0].addr = uint64_from_ptr(buf);
        txd->sge[0].length = size;
        txd->sge[0].lkey = srmr->lkey;

        txd->send_wr.wr_id = uint64_from_ptr(txd);
        txd->send_wr.next = NULL;
        txd->send_wr.sg_list = txd->sge;
        txd->send_wr.num_sge = 1;
        txd->send_wr.opcode = IBV_WR_SEND;
        txd->send_wr.send_flags = IBV_SEND_SIGNALED;

        INIT_LIST_HEAD(&txd->wr_list);
}

static void iser_rdmad_init(struct iser_work_req *rdmad,
                            struct iser_task *task,
                            struct ibv_mr *srmr)
{
        rdmad->task = task;

        (void)srmr;

        rdmad->send_wr.wr_id = uint64_from_ptr(rdmad);
        rdmad->send_wr.sg_list = rdmad->sge;
        rdmad->send_wr.num_sge = 1;
        rdmad->send_wr.next = NULL;
        rdmad->send_wr.send_flags = IBV_SEND_SIGNALED;

        /* to be set before posting:
         * rdmad->iser_ib_op, rdmad->send_wr.opcode
         * rdmad->sge.addr, rdmad->sge.length
         * rdmad->send_wr.wr.rdma.(remote_addr,rkey) */

        INIT_LIST_HEAD(&rdmad->wr_list);
}

void iser_task_init(struct iser_task *task,
                           struct iser_conn *conn,
                           void *pdu_buf,
                           unsigned long buf_size,
                           struct ibv_mr *srmr)
{
        task->conn = conn;
        task->unsolicited = 0;
        task->scmd.tio = NULL;  //initialize it to prevent assert happen.

        iser_pdu_init(&task->pdu, pdu_buf);

        iser_rxd_init(&task->rxd, task, pdu_buf, buf_size, srmr);
        iser_txd_init(&task->txd, task, pdu_buf, buf_size, srmr);
        iser_rdmad_init(&task->rdmad, task, conn->dev->membuf_mr);

        INIT_LIST_HEAD(&task->in_buf_list);
        INIT_LIST_HEAD(&task->out_buf_list);

        INIT_LIST_HEAD(&task->exec_list);
        INIT_LIST_HEAD(&task->rdma_list);
        INIT_LIST_HEAD(&task->tx_list);
        INIT_LIST_HEAD(&task->recv_list);

        INIT_LIST_HEAD(&task->session_list);
        INIT_LIST_HEAD(&task->dout_task_list);
}

static void iser_task_add_out_rdma_buf(struct iser_task *task,
                                       struct iser_membuf *data_buf,
                                       unsigned int offset)
{
        data_buf->offset = offset;
        task->out_buf_num++;
        DBUG("task:%p offset:%d size:%d data_buf:%p add last\n",
                task, offset, data_buf->size, data_buf->addr);
        list_add_tail(&data_buf->task_list, &task->out_buf_list);
}

static void iser_task_del_out_buf(struct iser_task *task,
                                  struct iser_membuf *data_buf)
{
        DBUG("task:%p offset:%d size:%d data_buf:%p\n",
                task, data_buf->offset, data_buf->size, data_buf->addr);
        list_del(&data_buf->task_list);
        task->out_buf_num--;
}

static void iser_task_add_in_rdma_buf(struct iser_task *task,
                                      struct iser_membuf *data_buf,
                                      unsigned int offset)
{
        DBUG("task:%p offset:0x%d size:%d data_buf:%p add last\n",
                task, offset, data_buf->size, data_buf->addr);
        data_buf->offset = offset;
        task->in_buf_num++;
        list_add_tail(&data_buf->task_list, &task->in_buf_list);
}

static void iser_task_del_in_buf(struct iser_task *task,
                                 struct iser_membuf *data_buf)
{
        DBUG("task:%p offset:%d size:%d data_buf:%p\n",
                task, data_buf->offset, data_buf->size, data_buf->addr);
        list_del(&data_buf->task_list);
        task->in_buf_num--;
}

static void iser_task_free_read_rdma_buf(struct iser_task *task, struct iser_membuf *rdma_buf)
{
        struct iser_conn *conn = task->conn;
        struct iser_device *dev = conn->dev;

        iser_dev_free_read_rdma_buf(dev, rdma_buf);
        if (unlikely(dev->waiting_for_mem)) {
                dev->waiting_for_mem = 0;

                iser_sched_add_event(&conn->sched_buf_alloc);
        }
}

static void iser_task_free_write_rdma_buf(struct iser_task *task, struct iser_membuf *rdma_buf)
{
        struct iser_conn *conn = task->conn;
        struct iser_device *dev = conn->dev;

        iser_dev_free_write_rdma_buf(dev, rdma_buf);
        if (unlikely(dev->waiting_for_mem)) {
                dev->waiting_for_mem = 0;

                iser_sched_add_event(&conn->sched_buf_alloc);
        }
}

static void iser_task_free_in_bufs(struct iser_task *task)
{
        struct iser_membuf *membuf, *mbnext;

        list_for_each_entry_safe(membuf, mbnext, &task->in_buf_list, task_list) {
                iser_task_del_in_buf(task, membuf);
                iser_task_free_read_rdma_buf(task, membuf);
        }
        YASSERT(task->in_buf_num == 0);
}

static void iser_task_free_out_bufs(struct iser_task *task)
{
        struct iser_membuf *membuf, *mbnext;
//        struct iser_device *dev = task->conn->dev;

        list_for_each_entry_safe(membuf, mbnext, &task->out_buf_list, task_list) {
                iser_task_del_out_buf(task, membuf);
                if (membuf->rdma) {
                        /*mem_handler_t *handler = &dev->mem_handler[0];
                        if ((uint64_t)handler->ptr % 4096 != 0) {
                                DWARN("cur mem handler %p, the ptr addr is %p, then handler ptr %p\n", membuf->mem_handler, handler->ptr, handler);
                        } */
                        iser_task_free_write_rdma_buf(task, membuf);
                } else {
                        DWARN("free out bufs not use rdma\n");
                }
        }
        YASSERT(task->out_buf_num == 0);
}

void iser_task_add_out_pdu_buf(struct iser_task *task,
                                      struct iser_membuf *data_buf,
                                      unsigned int offset)
{
        data_buf->offset = offset;
        task->out_buf_num++;
        if (!list_empty(&task->out_buf_list)) {
                struct iser_membuf *cur_buf;
                list_for_each_entry(cur_buf, &task->out_buf_list, task_list) {
                        if (offset < cur_buf->offset) {
                                DINFO("task:%p offset:%d size:%d data_buf:%p add before:%p\n",
                                        task, offset, data_buf->size, data_buf->addr, cur_buf->addr);
                                list_add_tail(&data_buf->task_list, &cur_buf->task_list);
                                return;
                        }
                }
        }
        DINFO("task:%p offset:%d size:%d data_buf:%p add last\n",
                task, offset, data_buf->size, data_buf->addr);
        list_add_tail(&data_buf->task_list, &task->out_buf_list);
}

void iser_task_complete(struct iser_task *task)
{
        struct iser_conn *conn = task->conn;

        if (unlikely(task->opcode != ISCSI_OP_SCSI_REQ)) {
                //DINFO("task:%p, non-cmd\n", task);
                return;
        }

        list_del(&task->session_list);
        if (task->is_read)
                iser_task_free_in_bufs(task);
        if (task->is_write) {
                iser_task_free_out_bufs(task);
                /* iser_task_free_dout_tasks(task); // ToDo: multiple out buffers */
        }

        /* we are completing scsi cmd task, returning from target */
        if (likely(task_in_scsi(task))) {

                //target_cmd_done(&task->scmd);

                clear_task_in_scsi(task);
                iser_conn_put(conn);
        }

        if (unlikely(task->extdata)) {
                free(task->extdata);
                task->extdata = NULL;
        }
}

int iser_task_alloc_rdma_bufs(struct iser_task *task)
{
        struct iser_conn *conn = task->conn;
        struct iser_membuf *rdma_wr_buf = NULL, *rdma_rd_buf = NULL;

        if (task->is_write && task->rdma_rd_sz > 0) {
                /* ToDo: multiple RDMA-Read buffers */
                if (unlikely(task->rdma_rd_sz > (int)membuf_size)) {
                        DERROR("conn:%p task:%p tag:0x%04"PRIx64 ", "
                                "rdma-rd size:%u too big\n",
                                conn, task, task->tag, task->rdma_rd_sz);
                        return -E2BIG;
                }
                rdma_rd_buf = iser_dev_alloc_write_rdma_buf(conn->dev);
                mbuffer_init(&rdma_rd_buf->buf, task->rdma_rd_sz);
                if (unlikely(rdma_rd_buf == NULL))
                        goto no_mem_ret;

                /* if this is a bidir task, allocation of the rdma_wr buffer
 *                    may still fail, thus        don't add the buffer to the task yet */
        }

        if (task->is_read && task->rdma_wr_sz > 0) {
                /* ToDo: multiple RDMA-Read buffers */
                if (unlikely(task->rdma_wr_sz > (int)membuf_size)) {
                        DERROR("conn:%p task:%p tag:0x%04"PRIx64 ", "
                                "rdma-wr size:%u too big\n",
                                conn, task, task->tag, task->rdma_wr_sz);
                        return -E2BIG;
                }
                rdma_wr_buf = iser_dev_alloc_read_rdma_buf(conn->dev);
                if (unlikely(rdma_wr_buf == NULL))
                        goto no_mem_ret;

                iser_task_add_in_rdma_buf(task, rdma_wr_buf, 0);
                DBUG("conn:%p task:%p tag:0x%04"PRIx64 ", "
                        "rdma-wr buf:%p sz:%u\n",
                        conn, task, task->tag,
                        rdma_wr_buf->addr, rdma_wr_buf->size);
        }

        /* With a write or bidir task there may be rdma portion and data-outs,
 *            independently. If data-outs remain, wait until they all arrive */
        if (task->is_write) {
                if (rdma_rd_buf) {
                        /* ToDo: multiple RDMA-Read buffers */
                        iser_task_add_out_rdma_buf(task, rdma_rd_buf,
                                                   task->unsol_sz);
                        DBUG("conn:%p task:%p tag:0x%04"PRIx64 ", "
                                "rdma-rd buf:%p sz:%u\n",
                                conn, task, task->tag,
                                rdma_rd_buf->addr, rdma_rd_buf->size);
                        schedule_rdma_read(task, conn);
                        return 0;
                }
                if (task->unsol_remains > 0)
                        return 0;
        }

        schedule_task_iosubmit(task, conn);
        return 0;

no_mem_ret:
        if (rdma_rd_buf)
                iser_dev_free_write_rdma_buf(conn->dev, rdma_rd_buf);
        conn->dev->waiting_for_mem = 1;

        DERROR("conn:%p task:%p tag:0x%04"PRIx64 ", free list empty\n",
                conn, task, task->tag);
        return -ENOMEM;
}

void iser_task_unsolicited_init(struct iser_task *task,
                                       struct iser_conn *conn,
                                       void *pdu_data_buf,
                                       unsigned long buf_size,
                                       struct ibv_mr *srmr)
{
        task->conn = conn;
        task->unsolicited = 1;

        iser_txd_init(&task->txd, task, pdu_data_buf, buf_size, srmr);
        iser_pdu_init(&task->pdu, pdu_data_buf);
}

int iser_task_delivery(struct iser_task *task)
{
        int ret;
        struct iser_conn *conn = task->conn;
        
        if (likely(task->opcode == ISCSI_OP_SCSI_REQ)) {
                ret = iser_task_alloc_rdma_bufs(task);
                if (unlikely(ret == -ENOMEM))
                        list_add_tail(&task->exec_list,
                                      &task->conn->buf_alloc_list);
                return ret;
        }

        switch (task->opcode) {
        case ISCSI_OP_NOP_OUT:
                ret = iser_nop_out_exec(task);
                break;
        case ISCSI_OP_LOGOUT_REQ:
                ret = iser_logout_exec(task);
                break;
        case ISCSI_OP_SCSI_TASK_MGT_REQ:
                ret = iser_tm_exec(task);
                break;
        case ISCSI_OP_TEXT_REQ:
                ret = iser_text_exec(&conn->h, &task->pdu, &conn->text_tx_task->pdu);
                schedule_resp_tx(conn->text_tx_task, conn);
                break;
        default:
                DERROR("Internal retor: Unexpected op:0x%x\n", task->opcode);
                ret = -EINVAL;
                break;
        }
        return ret;
}

/* queues the task according to cmd-sn, no exec here */
int iser_task_queue(struct iscsi_session *session,
                           struct iser_task *task)
{
        uint32_t cmd_sn = task->cmd_sn;
        struct list_head *cmp_entry;
        int ret;

        if (unlikely(task->is_immediate)) {
                DBUG("exec imm task task:%p tag:0x%0"PRIx64 " cmd_sn:0x%x\n",
                        task, task->tag, cmd_sn);
                ret = iser_task_delivery(task);
                if (likely(!ret || ret == -ENOMEM))
                        return 0;
                else
                        return ret;
        }
                                                                
        /* if the current command is the expected one, exec it
         * and all others possibly acumulated on the queue */
        while (session->exp_cmd_sn == cmd_sn) {
                session->exp_cmd_sn++;
                DBUG("exec task:%p cmd_sn:0x%x\n", task, cmd_sn);
                ret = iser_task_delivery(task);
                if (unlikely(ret && ret != -ENOMEM)) {
                        /* when no free buffers remains, the task will wait
                         * on queue, so it is not a real retor, but we should
                         * not attempt to start other tasks until more
                         * memory becomes available */
                        return ret;
                        /* ToDo: what if there are more tasks in case of retor */
                }

                if (list_empty(&session->pending_cmd_list))
                        return 0;

                task = list_entry(session->pending_cmd_list.next,
                                        struct iser_task, exec_list);
                list_del(&task->exec_list);
                clear_task_pending(task);
                cmd_sn = be32_to_cpu(task->pdu.bhs->sn);
        }

        /* cmd_sn > (exp_cmd_sn+ISER_MAX_QUEUE_CMD), i.e. beyond allowed window */
        if (unlikely(cmdsn_cmp(cmd_sn, session->exp_cmd_sn+ISER_MAX_QUEUE_CMD) == 1)) {
                DERROR("unexpected cmd_sn:0x%x, max:0x%x\n",
                        cmd_sn, session->exp_cmd_sn+ISER_MAX_QUEUE_CMD);
                return -EINVAL;
        }

        /* insert the current task, ordered by cmd_sn */
        list_for_each_prev(cmp_entry, &session->pending_cmd_list) {
                struct iser_task *cmp_task;
                uint32_t cmp_cmd_sn;
                int cmp_res;

                cmp_task = list_entry(cmp_entry, struct iser_task, exec_list);
                cmp_cmd_sn = cmp_task->cmd_sn;

                cmp_res = cmdsn_cmp(cmd_sn, cmp_cmd_sn);
                if (cmp_res == 1) { /* cmd_sn > cmp_cmd_sn */
                        DINFO("inserted cmdsn:0x%x after cmdsn:0x%x\n",
                                cmd_sn, cmp_cmd_sn);
                        break;
                } else if (cmp_res == -1) { /* cmd_sn < cmp_cmd_sn */
                        DINFO("inserting cmdsn:0x%x skip cmdsn:0x%x\n",
                                cmd_sn, cmp_cmd_sn);
                        continue;
                } else { /* cmd_sn == cmp_cmd_sn */
                        DERROR("duplicate cmd_sn:0x%x, exp:%u\n",
                                cmd_sn, session->exp_cmd_sn);
                        return -EINVAL;
                }
        }

        list_add(&task->exec_list, cmp_entry);
        set_task_pending(task);
        return 0;
}

void iser_sched_buf_alloc(struct event_data *evt)
{
        int ret;
        struct iser_conn *conn = (struct iser_conn *) evt->data;
        struct iser_task *task;

        while (!list_empty(&conn->buf_alloc_list)) {
                task = list_entry(conn->buf_alloc_list.next,
                                        struct iser_task,
                                        exec_list);
                ret = iser_task_alloc_rdma_bufs(task);
                if (likely(!ret))
                        list_del(&task->exec_list);
                else {
                        if (ret != -ENOMEM)
                                iser_conn_close(conn);
                        break;
                }
        }
}

void iser_sched_conn_free(struct event_data *evt)
{
        struct iser_conn *conn = (struct iser_conn *) evt->data;

        if(!conn)
                return;
                
        evt->data = NULL;
#if 1
        schedule_task_new("iser_conn_free", iser_conn_free, conn, -1);
#else
        iser_conn_free(conn);
#endif
}

static inline int task_can_batch(struct iser_task *task)
{
        struct iscsi_scsi_cmd_hdr *req_bhs = (struct iscsi_scsi_cmd_hdr *) task->pdu.bhs;

        switch (req_bhs->scb[0]) {
        case SYNCHRONIZE_CACHE:
        case SYNCHRONIZE_CACHE_16:
        case WRITE_6:
        case WRITE_10:
        case WRITE_12:
        case WRITE_16:
        case READ_6:
        case READ_10:
        case READ_12:
        case READ_16:
                return 1;
        default:
                return 0;
        }
}

void iser_task_free_dout_tasks(struct iser_task *task)
{
        struct iser_conn *conn = task->conn;
        struct iser_task *dout_task, *tnext;

        list_for_each_entry_safe(dout_task, tnext, &task->dout_task_list, dout_task_list) {
                list_del(&dout_task->dout_task_list);
                iser_task_del_out_buf(task, &dout_task->pdu.membuf);
                schedule_post_recv(dout_task, conn);
        }
}

void iser_sched_iosubmit(struct event_data *evt)
{
        struct iser_conn *conn = (struct iser_conn *) evt->data;
        struct iser_task *task, *next_task;
        int last_in_batch;

        list_for_each_entry_safe(task, next_task, &conn->iosubmit_list, exec_list) {
                if (&next_task->exec_list == &conn->iosubmit_list)
                        last_in_batch = 1; /* end of list */
                else
                        last_in_batch = (task_can_batch(task) &&
                                         task_can_batch(next_task)) ? 0 : 1;
                list_del(&task->exec_list);

                iser_scsi_cmd_iosubmit(task, !last_in_batch);
        }
}

void iser_sched_post_recv(struct event_data *evt)
{
        int ret;
        struct iser_conn *conn = (struct iser_conn *) evt->data;
        struct iser_task *first_task = NULL;
        struct iser_task *prev_task = NULL;
        struct iser_task *task;
        int num_recv_bufs = 0;

        if (unlikely(conn->h.state == STATE_CLOSE || conn->h.state == STATE_CLOSED)) {
                iser_conn_close(conn);
                DINFO("conn:%p closing, ignoring post recv\n", conn);
                return;
        }

        while (!list_empty(&conn->post_recv_list)) {
                task = list_entry(conn->post_recv_list.next,
                                        struct iser_task,
                                        recv_list);
                list_del(&task->recv_list);

                if (prev_task == NULL)
                        first_task = task;
                else
                        prev_task->rxd.recv_wr.next = &task->rxd.recv_wr;

                prev_task = task;
                num_recv_bufs++;
        }
        if (likely(prev_task)) {
                prev_task->rxd.recv_wr.next = NULL;
                /* post the chain of recv buffers, start from the first */
                ret = iser_dev_post_recv(conn, first_task, num_recv_bufs);
                (void) ret;

                /* ToDo: retor handling */
        }
}

static void iser_prep_rdma_rd_send_req(struct iser_task *task,
                                       struct iser_work_req *next_wr,
                                       int signaled)
{
        struct iser_work_req *rdmad = &task->rdmad;
        struct iser_membuf *rdma_buf;
        uint64_t offset;

        rdmad->iser_ib_op = ISER_IB_RDMA_READ;

        /* RDMA-Read buffer is always put at the list's tail */
        /* ToDo: multiple RDMA-Write buffers */
        rdma_buf = container_of(task->out_buf_list.prev,
                                struct iser_membuf, task_list);
        
//        mbuffer_init(&rdma_buf->buf,task->rdma_rd_sz);
        offset = rdma_buf->offset;
        rdmad->sge[0].addr = uint64_from_ptr(((seg_t *)(rdma_buf->buf.list.next))->handler.ptr) + offset;
        rdmad->sge[0].lkey = task->conn->mr->lkey;

        /* task->rdma_rd_offset += cur_req_sz; // ToDo: multiple RDMA-Write buffers */

        rdmad->sge[0].length = task->rdma_rd_sz;
        
        rdmad->send_wr.num_sge = 1;
        rdmad->send_wr.next = (next_wr ? &next_wr->send_wr : NULL);
        rdmad->send_wr.opcode = IBV_WR_RDMA_READ;
        rdmad->send_wr.send_flags = (signaled ? IBV_SEND_SIGNALED : 0);
        rdmad->send_wr.wr.rdma.remote_addr =
                (uint64_t) task->rem_write_va;
                /* + (offset - task->unsol_sz); // ToDo: multiple RDMA-Write buffers */
        rdmad->send_wr.wr.rdma.rkey =
                (uint32_t) task->rem_write_stag;

        DBUG("task:%p wr_id:0x%"PRIx64 " tag:0x%04"PRIx64 " daddr:0x%"PRIx64 " dsz:%u "
                "bufsz:%u rdma:%d lkey:%x raddr:%"PRIx64 " rkey:%x rems:%u\n",
                task, rdmad->send_wr.wr_id, task->tag, rdmad->sge[0].addr,
                rdmad->sge[0].length, rdma_buf->size, rdma_buf->rdma,
                rdmad->sge[0].lkey, rdmad->send_wr.wr.rdma.remote_addr,
                rdmad->send_wr.wr.rdma.rkey, task->rdma_rd_sz);
}

void iser_sched_rdma_rd(struct event_data *evt)
{
        int ret;
        struct iser_conn *conn = (struct iser_conn *) evt->data;
        struct iser_work_req *first_wr = NULL;
        struct iser_task *prev_task = NULL;
        struct iser_task *task = NULL;
        int num_reqs = 0;

	if (unlikely(conn->h.state == STATE_CLOSE ||conn->h.state == STATE_CLOSED)) {
                DINFO("conn:%p closing, ignoring rdma rd\n", conn);
		iser_conn_close(conn);
		return ;
	}

        while (!list_empty(&conn->rdma_rd_list)) {
                task = list_entry(conn->rdma_rd_list.next,
                                        struct iser_task, rdma_list);
                list_del(&task->rdma_list);

                iser_prep_rdma_rd_send_req(task, NULL, 1);
                if (first_wr == NULL)
                        first_wr = &task->rdmad;
                else
                        prev_task->rdmad.send_wr.next = &task->rdmad.send_wr;
                prev_task = task;
                num_reqs++;
        }

        if (likely(prev_task)) {
                prev_task->rdmad.send_wr.next = NULL;
                /* submit the chain of rdma-rd requests, start from the first */
                ret = iser_dev_post_send(conn, first_wr, num_reqs);
                (void) ret;

                /* ToDo: retor handling */
        }
}

static void iser_prep_resp_send_req(struct iser_task *task,
                                    struct iser_work_req *next_wr,
                                    int signaled)
{
        struct iser_pdu *pdu = &task->pdu;
        struct iser_hdr *iser_hdr = pdu->iser_hdr;
        struct iscsi_hdr *bhs = pdu->bhs;
        struct iser_work_req *txd = &task->txd;

        bhs->ahssize = pdu->ahssize / 4;
        hton24(bhs->datasize, pdu->membuf.size);

        txd->sge[0].length = ISER_HDRS_SZ;
        txd->sge[0].length += pdu->ahssize;
        txd->sge[0].length += pdu->membuf.size;

        memset(iser_hdr, 0, sizeof(*iser_hdr));
        iser_hdr->flags = ISER_ISCSI_CTRL;

        txd->send_wr.next = (next_wr ? &next_wr->send_wr : NULL);
        txd->send_wr.send_flags = (signaled ? IBV_SEND_SIGNALED : 0);

        DBUG("task:%p wr_id:0x%"PRIx64 " tag:0x%04"PRIx64 " dtbuf:%p "
                "dtsz:%u ahs_sz:%u stat:0x%x statsn:0x%x expcmdsn:0x%x\n",
                task, txd->send_wr.wr_id, task->tag,
                pdu->membuf.addr, pdu->membuf.size, pdu->ahssize,
                bhs->spec1[1], ntohl(bhs->sn), ntohl(bhs->exp_sn));
}

static void iser_prep_rdma_wr_send_req(struct iser_task *task,
                                       struct iser_work_req *next_wr,
                                       int signaled)
{
        struct iser_work_req *rdmad = &task->rdmad;
        struct iser_membuf *rdma_buf;
        uint64_t offset = 0; /* ToDo: multiple RDMA-Write buffers */
        struct list_head  *pos;
        seg_t *seg;
        int i = 0;

        rdmad->iser_ib_op = ISER_IB_RDMA_WRITE;

        /* RDMA-Write buffer is the only one on the list */
        /* ToDo: multiple RDMA-Write buffers, use rdma_buf->offset */
        rdma_buf = list_entry(task->in_buf_list.next, struct iser_membuf, task_list);
        
        list_for_each(pos, &rdma_buf->buf.list) {
                seg = (seg_t *)pos;
                
                rdmad->sge[i].addr = uint64_from_ptr(seg->handler.ptr);
                rdmad->sge[i].lkey = task->conn->mr->lkey;
                rdmad->sge[i].length = seg->len;
                i++;
        }

        YASSERT(i <= 2);
        rdmad->send_wr.num_sge = i;

        task->rdma_wr_remains = 0;
        /*if (likely(task->rdma_wr_remains <= (int)rdma_buf->size)) {
                rdmad->sge.length = task->rdma_wr_remains;
                task->rdma_wr_remains = 0;
        } else {
                rdmad->sge.length = rdma_buf->size;
                task->rdma_wr_remains -= rdmad->sge.length;
        } */

        rdmad->send_wr.next = (next_wr ? &next_wr->send_wr : NULL);
        rdmad->send_wr.opcode = IBV_WR_RDMA_WRITE;
        rdmad->send_wr.send_flags = (signaled ? IBV_SEND_SIGNALED : 0);

        rdmad->send_wr.wr.rdma.remote_addr = task->rem_read_va + offset;
        /* offset += rdmad->sge.length // ToDo: multiple RDMA-Write buffers */
        rdmad->send_wr.wr.rdma.rkey = task->rem_read_stag;

        DBUG(" task:%p tag:0x%04"PRIx64 " wr_id:0x%"PRIx64 " daddr:0x%"PRIx64 " dsz:%u "
                "bufsz:%u rdma:%d lkey:%x raddr:%"PRIx64 " rkey:%x rems:%u offset \n",
                task, task->tag, rdmad->send_wr.wr_id, rdmad->sge[0].addr,
                rdmad->sge[0].length, rdma_buf->buf.len, rdma_buf->rdma,
                rdmad->sge[0].lkey, rdmad->send_wr.wr.rdma.remote_addr,
                rdmad->send_wr.wr.rdma.rkey, task->rdma_wr_remains);
}

void iser_sched_tx(struct event_data *evt)
{
        int ret;
        struct iser_conn *conn = (struct iser_conn *) evt->data;
        struct iser_work_req *first_wr = NULL;
        struct iser_task *prev_task = NULL;
        struct iser_task *task;
        struct iser_work_req *cur_send_wr;
        int num_reqs = 0;

        if (unlikely(conn->h.state == STATE_CLOSE || conn->h.state == STATE_CLOSED)) {
                DINFO("conn:%p closing, ignoring tx\n", conn);
                iser_conn_close(conn);
		return;
        }

        while (!list_empty(&conn->resp_tx_list)) {
                task = list_entry(conn->resp_tx_list.next,
                                        struct iser_task,
                                        tx_list);
                list_del(&task->tx_list);
                list_add_tail(&task->tx_list, &conn->sent_list);

                if (task->is_read && task->rdma_wr_remains) {
                        iser_prep_rdma_wr_send_req(task, &task->txd, 1);
                        cur_send_wr = &task->rdmad;
                        num_reqs++;
                } else
                        cur_send_wr = &task->txd;

                if (prev_task == NULL)
                        first_wr = cur_send_wr;
                else
                        prev_task->txd.send_wr.next = &cur_send_wr->send_wr;

                iser_prep_resp_send_req(task, NULL, 1);
                prev_task = task;
                num_reqs++;
        }

        if (likely(prev_task && rdma_running)) {
                prev_task->txd.send_wr.next = NULL;
                /* submit the chain of rdma-wr & tx reqs, start from the first */
                ret = iser_dev_post_send(conn, first_wr, num_reqs);
                (void) ret;

                /* ToDo: retor handling */
        }
}

